# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from crccbuy.rules import *
from datetime import datetime
from zc_core.spiders.base import BaseSpider


# TODO: 数据量2657952
class SpuSpider(BaseSpider):
    name = 'spu'
    # 常用链接
    index_url = 'https://www.crccbuy.com/index.php?app=default&act=trmall'
    # spu列表链接
    spu_list_url = 'https://www.crccbuy.com/index.php?app=search&cate_id={}&page={}&brand=&props=&price=&order=add_time-desc'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SpuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        print("批次为:", self.batch_no)
        self.page_size = 20
        self.max_page_limit = 100

    def _build_list_req(self, catalog_name, catalog_id, page):
        return Request(
            method='GET',
            url=self.spu_list_url.format(catalog_id, page),
            meta={
                'reqType': 'spu',
                'batchNo': self.batch_no,
                'page': page,
                'catalogName': catalog_name,
                'catalogId': catalog_id,
            },
            callback=self.parse_spu_content_deal,
            errback=self.error_back,
            dont_filter=True
        )

    def start_requests(self):
        # 品类、品牌
        yield Request(
            url=self.index_url,
            meta={
                'batchNo': self.batch_no,
            },
            callback=self.parse_total_page,
            errback=self.error_back,
            dont_filter=True,
        )

    # 处理spu列表
    def parse_total_page(self, response):

        # 处理品类列表
        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)
            for cat in cats:
                if cat.get('level') == 3:
                    # 采集spu列表
                    catalog3Id = cat.get('catalogId')
                    catalog3Name = cat.get('catalogName')
                    catalog2Id = cat.get('parentId')
                    # catalog2Name = \
                    #     [i.get('catalogName') for i in cats if i.get('catalogId') == cat.get('parentId')][0]
                    catalog1Id = [i.get('parentId') for i in cats if i.get('catalogId') == catalog2Id][0]
                    # catalog1Name = [i.get('catalogName') for i in cats if i.get('catalogId') == catalog1Id][0]
                    # TODO 只采集办公类
                    if catalog1Id == "7427":
                        yield self._build_list_req(catalog3Name, catalog3Id, 1)

    # 处理spu列表
    def parse_spu_content_deal(self, response):

        # 获取所有品牌
        meta = response.meta
        catalog_name = meta.get('catalogName')
        catalog_id = meta.get('catalogId')
        cur_page = meta.get('page')
        self.logger.info("清单详情 cat=%s currentPage=%s" % (catalog_id, cur_page))
        # 处理
        spu_list = parse_spu(response)
        if spu_list:
            yield Box('spu', self.batch_no, spu_list)
            if cur_page == 1:
                all_total_page = parse_total_page(response)
                self.logger.info("清单 cat=%s page=%s" % (catalog_id, all_total_page))
                for page in range(2, all_total_page + 1):
                    yield self._build_list_req(catalog_name, catalog_id, page)
        else:
            self.logger.info("空页 cat=%s currentPage=%s" % (catalog_id, cur_page))
